***Calculate Price indexes***
clear all



*Set root data directory
local rootdir
cd "`rootdir'"


set mem 12g
set maxvar 10000




local list grains pulses milk oil meat veg fru sugar bev proc intox light cloth

foreach k in 43 55 61 66{ 
use round`k'_edit, clear
cap: gen segment=1
cap: gen substratum=1
drop hq* hx*

merge 1:1 sector subround region subsample fsu substratum segment hhno using round`k'_hh, keepusing(hhsize d* mult *mpce*)
keep if _merge==3
drop _merge

merge 1:1 sector subround region subsample fsu substratum segment hhno using mult`k', keepusing(regioncode)
keep if _merge==3
drop _merge

gen statecode=floor(region/10)

cap: gen district=.
cap: gen stratum=.

cap: gen districtcode=district
cap: replace districtcode=stratum if round==50

sort sector region fsu
bysort fsu sector: egen minddist=min(districtcode)
bysort fsu sector: egen maxddist=max(districtcode)
replace districtcode=minddist if districtcode==.
replace districtcode=maxddist if districtcode==0 & maxddist~=.


merge m:m statecode districtcode using "`rootdir'nss`k'_nm61"
drop if fsu==.
drop if _merge==2
drop _merge

ren statename statename`k'
ren districtname districtname`k'

merge m:m statename`k' districtname`k' using "`rootdir'district_conversion"
drop if fsu==.
drop if _merge==2
drop _merge

replace statenamedist=statename`k' if statenamedist==""
replace statenamedist="Goa, Daman & Diu" if statenamedist=="Goa" | statenamedist=="Daman & Diu"

drop if totexp==0
drop if totexp==.
drop if xfood==0
drop if xfood==.


gen lnmpce=log(totexp/hhsize)
gen lnhhsize=log(hhsize)
egen medlnmpce=median(lnmpce)
egen cluster=group(sector subround region subsample substratum segment fsu)


foreach i in `list'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{

*fix outliers
gen uv`i'_`j'=x`i'_`j'/q`i'_`j'
replace uv`i'_`j'=. if uv`i'_`j'<=0
*use the 2% cutoff for outliers
egen p99`i'_`j'=pctile(uv`i'_`j'), p(98)
egen p1`i'_`j'=pctile(uv`i'_`j'), p(2)
replace uv`i'_`j'=p99`i'_`j' if uv`i'_`j'>p99`i'_`j' & uv`i'_`j'~=.
replace uv`i'_`j'=p1`i'_`j' if uv`i'_`j'<p1`i'_`j' & uv`i'_`j'~=.
*drop p99* p1*

****Quality correction (not in paper)
gen luv`i'_`j'=log(uv`i'_`j')
cap: areg luv`i'_`j' lnmpce lnhhsize d*ratio*, absorb(cluster)
cap: replace luv`i'_`j'=luv`i'_`j'-(_b[lnmpce]*(lnmpce-medlnmpce))

*this prevents negative and really weird values
replace luv`i'_`j'=log(p99`i'_`j') if luv`i'_`j'>log(p99`i'_`j') & luv`i'_`j'~=.
replace luv`i'_`j'=log(p1`i'_`j') if luv`i'_`j'<log(p1`i'_`j') & luv`i'_`j'~=.
gen quv`i'_`j'=exp(luv`i'_`j')
drop p99* p1* luv*

gen presence`i'_`j'=0
replace presence`i'_`j'=1 if x`i'_`j'>0

*replace subround=1

***do for every aggregation***
bysort subround: egen ai_p`i'_`j'=median(uv`i'_`j')
bysort subround: egen ai_qp`i'_`j'=median(quv`i'_`j')
bysort subround: egen ai_agg`i'_`j'=total(x`i'_`j'*mult)
bysort subround: egen ai_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort sector subround: egen airu_p`i'_`j'=median(uv`i'_`j')
bysort sector subround: egen airu_qp`i'_`j'=median(quv`i'_`j')
bysort sector subround: egen airu_agg`i'_`j'=total(x`i'_`j'*mult)
bysort sector subround: egen airu_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort statenamedist subround: egen s_p`i'_`j'=median(uv`i'_`j')
bysort statenamedist subround: egen s_qp`i'_`j'=median(quv`i'_`j')
bysort statenamedist subround: egen s_agg`i'_`j'=total(x`i'_`j'*mult)
bysort statenamedist subround: egen s_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort statenamedist sector subround: egen sru_p`i'_`j'=median(uv`i'_`j')
bysort statenamedist sector subround: egen sru_qp`i'_`j'=median(quv`i'_`j')
bysort statenamedist sector subround: egen sru_agg`i'_`j'=total(x`i'_`j'*mult)
bysort statenamedist sector subround: egen sru_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort regioncode subround: egen r_p`i'_`j'=median(uv`i'_`j')
bysort regioncode subround: egen r_qp`i'_`j'=median(quv`i'_`j')
bysort regioncode subround: egen r_agg`i'_`j'=total(x`i'_`j'*mult)
bysort regioncode subround: egen r_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort regioncode sector subround: egen rru_p`i'_`j'=median(uv`i'_`j')
bysort regioncode sector subround: egen rru_qp`i'_`j'=median(quv`i'_`j')
bysort regioncode sector subround: egen rru_agg`i'_`j'=total(x`i'_`j'*mult)
bysort regioncode sector subround: egen rru_pnum`i'_`j'=total(presence`i'_`j'*mult)


bysort statenamedist districtnamedist subround: egen d_p`i'_`j'=median(uv`i'_`j')
bysort statenamedist districtnamedist subround: egen d_qp`i'_`j'=median(quv`i'_`j')
bysort statenamedist districtnamedist subround: egen d_agg`i'_`j'=total(x`i'_`j'*mult)
bysort statenamedist districtnamedist subround: egen d_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort statenamedist districtnamedist sector subround: egen dru_p`i'_`j'=median(uv`i'_`j')
bysort statenamedist districtnamedist sector subround: egen dru_qp`i'_`j'=median(quv`i'_`j')
bysort statenamedist districtnamedist sector subround: egen dru_agg`i'_`j'=total(x`i'_`j'*mult)
bysort statenamedist districtnamedist sector subround: egen dru_pnum`i'_`j'=total(presence`i'_`j'*mult)

drop x`i'_`j' q`i'_`j' *uv`i'_`j' presence`i'_`j'
}
}

*this will be useful for calculating ``real incomes'' later
bysort subround: egen ai_numhh=total(mult)
bysort sector subround: egen airu_numhh=total(mult)
bysort statenamedist subround: egen s_numhh=total(mult)
bysort statenamedist sector subround: egen sru_numhh=total(mult)
bysort regioncode subround: egen r_numhh=total(mult)
bysort regioncode sector subround: egen rru_numhh=total(mult)
bysort statenamedist districtnamedist subround: egen d_numhh=total(mult)
bysort statenamedist districtnamedist sector subround: egen dru_numhh=total(mult)

bysort subround: egen ai_mpce=total(mult*mpce)
replace ai_mpce=ai_mpce/ai_numhh
bysort sector subround: egen airu_mpce=total(mult*mpce)
replace airu_mpce=airu_mpce/airu_numhh
bysort statenamedist subround: egen s_mpce=total(mult*mpce)
replace s_mpce=s_mpce/s_numhh
bysort statenamedist sector subround: egen sru_mpce=total(mult*mpce)
replace sru_mpce=sru_mpce/sru_numhh
bysort regioncode subround: egen r_mpce=total(mult*mpce)
replace r_mpce=r_mpce/r_numhh
bysort regioncode sector subround: egen rru_mpce=total(mult*mpce)
replace rru_mpce=rru_mpce/rru_numhh
bysort statenamedist districtnamedist subround: egen d_mpce=total(mult*mpce)
replace d_mpce=d_mpce/d_numhh
bysort statenamedist districtnamedist sector subround: egen dru_mpce=total(mult*mpce)
replace dru_mpce=dru_mpce/dru_numhh


**this will keep states with no district names, should also keep every unique district since statecode is more diffuse than statename (esp. in later rounds when states get split up)
duplicates drop subround statenamedist districtnamedist regioncode sector, force
keep subround round statenamedist districtnamedist regioncode sector *_p*_* *_qp*_* *_agg*_* n*tot *_numhh *_mpce *_pnum*
save prices`k'_agg_sigma, replace
}



local list grains pulses milk oil meat veg fru sugar bev proc intox light cloth
foreach k in 38{ 
use round`k'_edit, clear
cap: gen segment=1
cap: gen substratum=1
drop hq* hx*

merge 1:1 sector subround region subsample fsu substratum segment hhno using round`k'_hh, keepusing(hhsize d* mult *mpce*)
keep if _merge==3
drop _merge

merge 1:1 sector subround region subsample fsu substratum segment hhno using mult`k', keepusing(regioncode)
keep if _merge==3
drop _merge



gen statecode=floor(region/10)
drop if fsu==.


drop if totexp==0
drop if totexp==.
drop if xfood==0
drop if xfood==.


gen lnmpce=log(totexp/hhsize)
gen lnhhsize=log(hhsize)
egen medlnmpce=median(lnmpce)
egen cluster=group(sector subround region subsample substratum segment fsu)

gen statenamedist=""
replace statenamedist="Andhra Pradesh" if statecode==2
replace statenamedist="Assam" if statecode==3
replace statenamedist="Bihar" if statecode==4
replace statenamedist="Gujarat" if statecode==5
replace statenamedist="Haryana" if statecode==6
replace statenamedist="Himachal Pradesh" if statecode==7
replace statenamedist="Jammu & Kashmir" if statecode==8
replace statenamedist="Karnataka" if statecode==9
replace statenamedist="Kerala" if statecode==10
replace statenamedist="Madhya Pradesh" if statecode==11
replace statenamedist="Maharashtra" if statecode==12
replace statenamedist="Manipur" if statecode==13
replace statenamedist="Meghalaya" if statecode==14
replace statenamedist="Nagaland" if statecode==15
replace statenamedist="Orissa" if statecode==16
replace statenamedist="Punjab" if statecode==17
replace statenamedist="Rajasthan" if statecode==18	
replace statenamedist="Sikkim" if statecode==19
replace statenamedist="Tamil Nadu" if statecode==20
replace statenamedist="Tripura" if statecode==21
replace statenamedist="Uttar Pradesh" if statecode==22
replace statenamedist="West Bengal" if statecode==23
replace statenamedist="Andaman & Nicobar Islands" if statecode==24
replace statenamedist="Arunachal Pradesh" if statecode==25
replace statenamedist="Chandigarh" if statecode==26
replace statenamedist="Dadra & Nagar Haveli" if statecode==27
replace statenamedist="Delhi" if statecode==28
replace statenamedist="Goa, Daman & Diu" if statecode==29
replace statenamedist="Lakshadweep" if statecode==30
replace statenamedist="Mizoram" if statecode==31
replace statenamedist="Pondicherry" if statecode==32


foreach i in `list'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{

*fix outliers
gen uv`i'_`j'=x`i'_`j'/q`i'_`j'
replace uv`i'_`j'=. if uv`i'_`j'<=0
egen p99`i'_`j'=pctile(uv`i'_`j'), p(99)
egen p1`i'_`j'=pctile(uv`i'_`j'), p(1)
replace uv`i'_`j'=p99`i'_`j' if uv`i'_`j'>p99`i'_`j' & uv`i'_`j'~=.
replace uv`i'_`j'=p1`i'_`j' if uv`i'_`j'<p1`i'_`j' & uv`i'_`j'~=.
*drop p99* p1*

****Quality correction (not in paper)
gen luv`i'_`j'=log(uv`i'_`j')

cap: areg luv`i'_`j' lnmpce lnhhsize d*ratio*, absorb(cluster)
cap: replace luv`i'_`j'=luv`i'_`j'-(_b[lnmpce]*(lnmpce-medlnmpce))
replace luv`i'_`j'=log(p99`i'_`j') if luv`i'_`j'>log(p99`i'_`j') & luv`i'_`j'~=.
replace luv`i'_`j'=log(p1`i'_`j') if luv`i'_`j'<log(p1`i'_`j') & luv`i'_`j'~=.
gen quv`i'_`j'=exp(luv`i'_`j')
drop p99* p1* luv*



gen presence`i'_`j'=0
replace presence`i'_`j'=1 if x`i'_`j'>0

*replace subround=1
***do for every aggregation***
bysort subround: egen ai_p`i'_`j'=median(uv`i'_`j')
bysort subround: egen ai_qp`i'_`j'=median(quv`i'_`j')
bysort subround: egen ai_agg`i'_`j'=total(x`i'_`j'*mult)
bysort subround: egen ai_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort sector subround: egen airu_p`i'_`j'=median(uv`i'_`j')
bysort sector subround: egen airu_qp`i'_`j'=median(quv`i'_`j')
bysort sector subround: egen airu_agg`i'_`j'=total(x`i'_`j'*mult)
bysort sector subround: egen airu_pnum`i'_`j'=total(presence`i'_`j'*mult)


bysort regioncode subround: egen r_p`i'_`j'=median(uv`i'_`j')
bysort regioncode subround: egen r_qp`i'_`j'=median(quv`i'_`j')
bysort regioncode subround: egen r_agg`i'_`j'=total(x`i'_`j'*mult)
bysort regioncode subround: egen r_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort regioncode sector subround: egen rru_p`i'_`j'=median(uv`i'_`j')
bysort regioncode sector subround: egen rru_qp`i'_`j'=median(quv`i'_`j')
bysort regioncode sector subround: egen rru_agg`i'_`j'=total(x`i'_`j'*mult)
bysort regioncode sector subround: egen rru_pnum`i'_`j'=total(presence`i'_`j'*mult)


bysort statenamedist subround: egen s_p`i'_`j'=median(uv`i'_`j')
bysort statenamedist subround: egen s_qp`i'_`j'=median(quv`i'_`j')
bysort statenamedist subround: egen s_agg`i'_`j'=total(x`i'_`j'*mult)
bysort statenamedist subround: egen s_pnum`i'_`j'=total(presence`i'_`j'*mult)

bysort statenamedist sector subround: egen sru_p`i'_`j'=median(uv`i'_`j')
bysort statenamedist sector subround: egen sru_qp`i'_`j'=median(quv`i'_`j')
bysort statenamedist sector subround: egen sru_agg`i'_`j'=total(x`i'_`j'*mult)
bysort statenamedist sector subround: egen sru_pnum`i'_`j'=total(presence`i'_`j'*mult)

drop x`i'_`j' q`i'_`j' *uv`i'_`j' presence`i'_`j'
}
}

*this will be useful for calculating ``real incomes'' later
bysort subround: egen ai_numhh=total(mult)
bysort sector subround: egen airu_numhh=total(mult)
bysort statenamedist subround: egen s_numhh=total(mult)
bysort statenamedist sector subround: egen sru_numhh=total(mult)
bysort regioncode subround: egen r_numhh=total(mult)
bysort regioncode sector subround: egen rru_numhh=total(mult)


bysort subround: egen ai_mpce=total(mult*mpce)
replace ai_mpce=ai_mpce/ai_numhh
bysort sector subround: egen airu_mpce=total(mult*mpce)
replace airu_mpce=airu_mpce/airu_numhh
bysort regioncode subround: egen r_mpce=total(mult*mpce)
replace r_mpce=r_mpce/r_numhh
bysort regioncode sector subround: egen rru_mpce=total(mult*mpce)
replace rru_mpce=rru_mpce/rru_numhh
bysort statenamedist subround: egen s_mpce=total(mult*mpce)
replace s_mpce=s_mpce/s_numhh
bysort statenamedist sector subround: egen sru_mpce=total(mult*mpce)
replace sru_mpce=sru_mpce/sru_numhh

**this will keep states with no district names, should also keep every unique district since statecode is more diffuse than statename (esp. in later rounds when states get split up)
duplicates drop regioncode sector subround, force
keep subround round statenamedist regioncode sector *_p*_* *_qp*_* *_agg*_* n*tot  *_numhh *_mpce *_pnum*


save prices`k'_agg_sigma, replace
}







****File for generating prices indexes from aggregated data on expenditures and prices*******





*********Within region, over time Price Index************


use prices38_agg_sigma, clear
foreach j in 43 55 61 66{
append using prices`j'_agg_sigma
}



forvalues k=17(1)24{
local kk=`k'-7
gen r_aggproc_`kk'=r_aggfru_`k'
gen r_qpproc_`kk'=r_qpfru_`k'
gen r_pproc_`kk'=r_pfru_`k'
}

replace nfrutot=16
replace nproctot=nproctot+8

drop *fru_17 *fru_18 *fru_19 *fru_20 *fru_21 *fru_22 *fru_23 *fru_24



duplicates drop regioncode round, force

local threshold=3

local list grains pulses milk oil meat veg fru sugar bev proc 
local list2 intox light cloth

local prefix r_
local baseprefix r_ 

local agg regioncode

gen all=1

gen base=.
replace base=1 if round==38



gen totexp=0
gen btotexp=0
gen sumagg=0
gen sumbagg=0

foreach i in `list' `list2'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{
gen basep`i'_`j'=`baseprefix'p`i'_`j' if base==1
gen baseqp`i'_`j'=`baseprefix'qp`i'_`j' if base==1
gen baseagg`i'_`j'=`baseprefix'agg`i'_`j' if base==1

bysort `agg': egen bp`i'_`j'=min(basep`i'_`j')
bysort `agg': egen bqp`i'_`j'=min(baseqp`i'_`j')
bysort `agg': egen bagg`i'_`j'=min(baseagg`i'_`j')

gen relp`i'_`j'=log(`prefix'p`i'_`j'/bp`i'_`j')
gen relqp`i'_`j'=log(`prefix'qp`i'_`j'/bqp`i'_`j')


replace relp`i'_`j'=`threshold' if relp`i'_`j'>`threshold' & relp`i'_`j'~=.
replace relp`i'_`j'=-`threshold' if relp`i'_`j'<-`threshold' & relp`i'_`j'~=.

replace relqp`i'_`j'=relp`i'_`j' if relqp`i'_`j'==.


replace relqp`i'_`j'=`threshold' if relqp`i'_`j'>`threshold' & relqp`i'_`j'~=.
replace relqp`i'_`j'=-`threshold' if relqp`i'_`j'<-`threshold' & relqp`i'_`j'~=.


drop *_p`i'_`j' *_qp`i'_`j' base*_*

replace `prefix'agg`i'_`j'=0 if relp`i'_`j'==. | `prefix'agg`i'_`j'==.
replace bagg`i'_`j'=0 if relp`i'_`j'==. | bagg`i'_`j'==.

replace sumagg=sumagg+`prefix'agg`i'_`j'
replace sumbagg=sumbagg+bagg`i'_`j'
}
}



foreach i in `list' `list2'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{
gen weight`i'_`j'=((`prefix'agg`i'_`j'/sumagg)+(bagg`i'_`j'/sumbagg))/2
replace weight`i'_`j'=0 if relp`i'_`j'==.
replace relp`i'_`j'=0 if relp`i'_`j'==.
replace relqp`i'_`j'=0 if relqp`i'_`j'==.
drop `prefix'agg`i'_`j' bagg`i'_`j'
}
}
drop sumagg sumbagg

gen weight=0
gen weightfood=0

foreach i in `list'{
local nmax=n`i'tot[1]
gen weight`i'=0
forvalues j=1(1)`nmax'{
replace weight`i'=weight`i'+weight`i'_`j'
replace weightfood=weightfood+weight`i'_`j'
replace weight=weight+weight`i'_`j'
}
}

foreach i in `list2'{
local nmax=n`i'tot[1]
gen weight`i'=0
forvalues j=1(1)`nmax'{
replace weight`i'=weight`i'+weight`i'_`j'
replace weight=weight+weight`i'_`j'
}
}

gen pindex=0
gen pindexfood=0
gen qpindex=0
gen qpindexfood=0

foreach i in `list'{
local nmax=n`i'tot[1]
gen pindex`i'=0
gen qpindex`i'=0
forvalues j=1(1)`nmax'{
replace pindex`i'=pindex`i'+((weight`i'_`j'/weight`i') * relp`i'_`j') if weight`i'~=0
replace qpindex`i'=qpindex`i'+((weight`i'_`j'/weight`i') * relqp`i'_`j') if weight`i'~=0

replace pindexfood=pindexfood+((weight`i'_`j'/weightfood) * relp`i'_`j')
replace qpindexfood=qpindexfood+((weight`i'_`j'/weightfood) * relqp`i'_`j')

replace pindex=pindex+((weight`i'_`j'/weight) * relp`i'_`j')
replace qpindex=qpindex+((weight`i'_`j'/weight) * relqp`i'_`j')
}
}

foreach i in `list2'{
local nmax=n`i'tot[1]
gen pindex`i'=0
gen qpindex`i'=0
forvalues j=1(1)`nmax'{
replace pindex`i'=pindex`i'+((weight`i'_`j'/weight`i') * relp`i'_`j') if weight`i'~=0
replace qpindex`i'=qpindex`i'+((weight`i'_`j'/weight`i') * relqp`i'_`j') if weight`i'~=0
replace pindex=pindex+((weight`i'_`j'/weight) * relp`i'_`j')
replace qpindex=qpindex+((weight`i'_`j'/weight) * relqp`i'_`j')
}
}

foreach i in food `list' `list2'{
replace pindex`i'=exp(pindex`i')
replace qpindex`i'=exp(qpindex`i')
}
replace pindex=exp(pindex)
replace qpindex=exp(qpindex)


bysort round sector: summ *pindex*

keep regioncode round *pindex*
duplicates drop regioncode round, force

save pindex_r_revised, replace






********Rural versus Urban within region-round**********


use prices38_agg_sigma, clear
foreach j in 43 55 61 66{
append using prices`j'_agg_sigma
}


forvalues k=17(1)24{
local kk=`k'-7
gen rru_aggproc_`kk'=rru_aggfru_`k'
gen rru_qpproc_`kk'=rru_qpfru_`k'
gen rru_pproc_`kk'=rru_pfru_`k'
}

replace nfrutot=16
replace nproctot=nproctot+8

drop *fru_17 *fru_18 *fru_19 *fru_20 *fru_21 *fru_22 *fru_23 *fru_24


duplicates drop regioncode sector round, force

local threshold=3

local list grains pulses milk oil meat veg fru sugar bev proc 
local list2 intox light cloth

local prefix rru_
local baseprefix rru_ 


local agg regioncode round

gen all=1

gen base=.
replace base=1 if sector==1



gen totexp=0
gen btotexp=0
gen sumagg=0
gen sumbagg=0

foreach i in `list' `list2'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{
gen basep`i'_`j'=`baseprefix'p`i'_`j' if base==1
gen baseqp`i'_`j'=`baseprefix'qp`i'_`j' if base==1
gen baseagg`i'_`j'=`baseprefix'agg`i'_`j' if base==1

bysort `agg': egen bp`i'_`j'=min(basep`i'_`j')
bysort `agg': egen bqp`i'_`j'=min(baseqp`i'_`j')
bysort `agg': egen bagg`i'_`j'=min(baseagg`i'_`j')

gen relp`i'_`j'=log(`prefix'p`i'_`j'/bp`i'_`j')
gen relqp`i'_`j'=log(`prefix'qp`i'_`j'/bqp`i'_`j')

**set a maximum price difference? right now set at a factor of 50
replace relp`i'_`j'=`threshold' if relp`i'_`j'>`threshold' & relp`i'_`j'~=.
replace relp`i'_`j'=-`threshold' if relp`i'_`j'<-`threshold' & relp`i'_`j'~=.

replace relqp`i'_`j'=relp`i'_`j' if relqp`i'_`j'==.


replace relqp`i'_`j'=`threshold' if relqp`i'_`j'>`threshold' & relqp`i'_`j'~=.
replace relqp`i'_`j'=-`threshold' if relqp`i'_`j'<-`threshold' & relqp`i'_`j'~=.


drop *_p`i'_`j' *_qp`i'_`j' base*_*

replace `prefix'agg`i'_`j'=0 if relp`i'_`j'==. | `prefix'agg`i'_`j'==.
replace bagg`i'_`j'=0 if relp`i'_`j'==. | bagg`i'_`j'==.

replace sumagg=sumagg+`prefix'agg`i'_`j'
replace sumbagg=sumbagg+bagg`i'_`j'
}
}



foreach i in `list' `list2'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{
gen weight`i'_`j'=((`prefix'agg`i'_`j'/sumagg)+(bagg`i'_`j'/sumbagg))/2
replace weight`i'_`j'=0 if relp`i'_`j'==.
replace relp`i'_`j'=0 if relp`i'_`j'==.
replace relqp`i'_`j'=0 if relqp`i'_`j'==.
drop `prefix'agg`i'_`j' bagg`i'_`j'
}
}
drop sumagg sumbagg

gen weight=0
gen weightfood=0

foreach i in `list'{
local nmax=n`i'tot[1]
gen weight`i'=0
forvalues j=1(1)`nmax'{
replace weight`i'=weight`i'+weight`i'_`j'
replace weightfood=weightfood+weight`i'_`j'
replace weight=weight+weight`i'_`j'
}
}

foreach i in `list2'{
local nmax=n`i'tot[1]
gen weight`i'=0
forvalues j=1(1)`nmax'{
replace weight`i'=weight`i'+weight`i'_`j'
replace weight=weight+weight`i'_`j'
}
}

gen pindex=0
gen pindexfood=0
gen qpindex=0
gen qpindexfood=0

foreach i in `list'{
local nmax=n`i'tot[1]
gen pindex`i'=0
gen qpindex`i'=0
forvalues j=1(1)`nmax'{
replace pindex`i'=pindex`i'+((weight`i'_`j'/weight`i') * relp`i'_`j') if weight`i'~=0
replace qpindex`i'=qpindex`i'+((weight`i'_`j'/weight`i') * relqp`i'_`j') if weight`i'~=0

replace pindexfood=pindexfood+((weight`i'_`j'/weightfood) * relp`i'_`j')
replace qpindexfood=qpindexfood+((weight`i'_`j'/weightfood) * relqp`i'_`j')

replace pindex=pindex+((weight`i'_`j'/weight) * relp`i'_`j')
replace qpindex=qpindex+((weight`i'_`j'/weight) * relqp`i'_`j')
}
}

foreach i in `list2'{
local nmax=n`i'tot[1]
gen pindex`i'=0
gen qpindex`i'=0
forvalues j=1(1)`nmax'{
replace pindex`i'=pindex`i'+((weight`i'_`j'/weight`i') * relp`i'_`j') if weight`i'~=0
replace qpindex`i'=qpindex`i'+((weight`i'_`j'/weight`i') * relqp`i'_`j') if weight`i'~=0
replace pindex=pindex+((weight`i'_`j'/weight) * relp`i'_`j')
replace qpindex=qpindex+((weight`i'_`j'/weight) * relqp`i'_`j')
}
}

foreach i in food `list' `list2'{
replace pindex`i'=exp(pindex`i')
replace qpindex`i'=exp(qpindex`i')
}
replace pindex=exp(pindex)
replace qpindex=exp(qpindex)


bysort `agg' sector: summ *pindex*

keep `agg' sector *pindex*
duplicates drop `agg' sector, force

save pindex_rur_revised, replace

















*****District level********

local list grains pulses milk oil meat veg fru sugar bev proc intox light cloth

local agg statenamedist districtnamedist

foreach k in 43 55 61 66{ 
use round`k'_edit, clear
cap: gen segment=1
cap: gen substratum=1
drop hq* hx*

merge 1:1 sector subround region subsample fsu substratum segment hhno using round`k'_hh, keepusing(hhsize d* mult)
keep if _merge==3
drop _merge

gen statecode=floor(region/10)

cap: gen district=.
cap: gen stratum=.

cap: gen districtcode=district
cap: replace districtcode=stratum if round==50

sort sector region fsu
bysort fsu sector: egen minddist=min(districtcode)
bysort fsu sector: egen maxddist=max(districtcode)
replace districtcode=minddist if districtcode==.
replace districtcode=maxddist if districtcode==0 & maxddist~=.



merge m:m statecode districtcode using "`rootdir'nss`k'_nm61"
drop if fsu==.
keep if _merge==3
drop _merge

ren statename statename`k'
ren districtname districtname`k'

merge m:m statename`k' districtname`k' using "`rootdir'district_conversion"
drop if fsu==.
keep if _merge==3
drop _merge


drop if totexp==0
drop if totexp==.
drop if xfood==0
drop if xfood==.


gen lnmpce=log(totexp/hhsize)
gen lnhhsize=log(hhsize)
egen medlnmpce=median(lnmpce)
egen cluster=group(sector subround region subsample substratum segment fsu)


foreach i in `list'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{



*fix outliers
gen uv`i'_`j'=x`i'_`j'/q`i'_`j'
replace uv`i'_`j'=. if uv`i'_`j'<=0
egen p99`i'_`j'=pctile(uv`i'_`j'), p(99)
egen p1`i'_`j'=pctile(uv`i'_`j'), p(1)
replace uv`i'_`j'=p99`i'_`j' if uv`i'_`j'>p99`i'_`j' & uv`i'_`j'~=.
replace uv`i'_`j'=p1`i'_`j' if uv`i'_`j'<p1`i'_`j' & uv`i'_`j'~=.
drop p99* p1*



**Quality correction (not in the paper)
gen luv`i'_`j'=log(uv`i'_`j')
cap: areg luv`i'_`j' lnmpce lnhhsize d*ratio*, absorb(cluster)
cap: replace luv`i'_`j'=luv`i'_`j'-(_b[lnmpce]*(lnmpce-medlnmpce))
egen p99`i'_`j'=pctile(luv`i'_`j'), p(99)
egen p1`i'_`j'=pctile(luv`i'_`j'), p(1)
replace luv`i'_`j'=p99`i'_`j' if luv`i'_`j'>p99`i'_`j' & luv`i'_`j'~=.
replace luv`i'_`j'=p1`i'_`j' if luv`i'_`j'<p1`i'_`j' & luv`i'_`j'~=.
gen quv`i'_`j'=exp(luv`i'_`j')
drop p99* p1* luv*

bysort `agg': egen p`i'_`j'=median(uv`i'_`j')
bysort `agg': egen qp`i'_`j'=median(quv`i'_`j')

egen basep`i'_`j'=median(uv`i'_`j')
egen baseqp`i'_`j'=median(quv`i'_`j')

*Generate expenditure weights

bysort `agg': egen agg`i'_`j'=total(x`i'_`j'*mult)

egen baseagg`i'_`j'=total(x`i'_`j'*mult)
drop x`i'_`j' q`i'_`j' uv`i'_`j'
}
}

duplicates drop `agg', force
keep `agg' round p*_* qp*_* *agg*_* base*_* n*tot
save prices`k'_districts_revised, replace
}




use prices43_districts_revised, clear
foreach j in 55 61 66{
append using prices`j'_districts_revised
}




local threshold=3

local list grains pulses milk oil meat veg fru sugar bev proc 
local list2 intox light cloth

gen base=.
replace base=1 if round==43


gen totexp=0
gen btotexp=0
gen sumagg=0
gen sumbagg=0

foreach i in `list' `list2'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{
cap: gen basep`i'_`j'=p`i'_`j' if base==1
cap: gen baseqp`i'_`j'=qp`i'_`j' if base==1
cap: gen baseagg`i'_`j'=agg`i'_`j' if base==1

replace basep`i'_`j'=0 if basep`i'_`j'==.
replace baseqp`i'_`j'=0 if baseqp`i'_`j'==.
replace baseagg`i'_`j'=0 if baseagg`i'_`j'==.

egen bp`i'_`j'=min(basep`i'_`j'*base)
egen bqp`i'_`j'=min(baseqp`i'_`j'*base)
egen bagg`i'_`j'=min(baseagg`i'_`j'*base)

gen relp`i'_`j'=log(p`i'_`j'/bp`i'_`j')
gen relqp`i'_`j'=log(qp`i'_`j'/bqp`i'_`j')


replace relp`i'_`j'=`threshold' if relp`i'_`j'>`threshold' & relp`i'_`j'~=.
replace relp`i'_`j'=-`threshold' if relp`i'_`j'<-`threshold' & relp`i'_`j'~=.

replace relqp`i'_`j'=relp`i'_`j' if relqp`i'_`j'==.


replace relqp`i'_`j'=`threshold' if relqp`i'_`j'>`threshold' & relqp`i'_`j'~=.
replace relqp`i'_`j'=-`threshold' if relqp`i'_`j'<-`threshold' & relqp`i'_`j'~=.


drop base*p`i'_`j' p`i'_`j' qp`i'_`j' baseagg`i'_`j'

replace agg`i'_`j'=0 if relp`i'_`j'==. | agg`i'_`j'==.
replace bagg`i'_`j'=0 if relp`i'_`j'==. | bagg`i'_`j'==.

replace sumagg=sumagg+agg`i'_`j'
replace sumbagg=sumbagg+bagg`i'_`j'
}
}



foreach i in `list' `list2'{
local nmax=n`i'tot[1]
forvalues j=1(1)`nmax'{
gen weight`i'_`j'=((agg`i'_`j'/sumagg)+(bagg`i'_`j'/sumbagg))/2
replace weight`i'_`j'=0 if relp`i'_`j'==.
replace relp`i'_`j'=0 if relp`i'_`j'==.
replace relqp`i'_`j'=0 if relqp`i'_`j'==.
drop agg`i'_`j' bagg`i'_`j'
}
}
drop sumagg sumbagg

gen weight=0
gen weightfood=0

foreach i in `list'{
local nmax=n`i'tot[1]
gen weight`i'=0
forvalues j=1(1)`nmax'{
replace weight`i'=weight`i'+weight`i'_`j'
replace weightfood=weightfood+weight`i'_`j'
replace weight=weight+weight`i'_`j'
}
}

foreach i in `list2'{
local nmax=n`i'tot[1]
gen weight`i'=0
forvalues j=1(1)`nmax'{
replace weight`i'=weight`i'+weight`i'_`j'
replace weight=weight+weight`i'_`j'
}
}

gen pindex=0
gen pindexfood=0
gen qpindex=0
gen qpindexfood=0

foreach i in `list'{
local nmax=n`i'tot[1]
gen pindex`i'=0
gen qpindex`i'=0
forvalues j=1(1)`nmax'{
replace pindex`i'=pindex`i'+((weight`i'_`j'/weight`i') * relp`i'_`j') if weight`i'~=0
replace qpindex`i'=qpindex`i'+((weight`i'_`j'/weight`i') * relqp`i'_`j') if weight`i'~=0

replace pindexfood=pindexfood+((weight`i'_`j'/weightfood) * relp`i'_`j')
replace qpindexfood=qpindexfood+((weight`i'_`j'/weightfood) * relqp`i'_`j')

replace pindex=pindex+((weight`i'_`j'/weight) * relp`i'_`j')
replace qpindex=qpindex+((weight`i'_`j'/weight) * relqp`i'_`j')
}
}

foreach i in `list2'{
local nmax=n`i'tot[1]
gen pindex`i'=0
gen qpindex`i'=0
forvalues j=1(1)`nmax'{
replace pindex`i'=pindex`i'+((weight`i'_`j'/weight`i') * relp`i'_`j') if weight`i'~=0
replace qpindex`i'=qpindex`i'+((weight`i'_`j'/weight`i') * relqp`i'_`j') if weight`i'~=0
replace pindex=pindex+((weight`i'_`j'/weight) * relp`i'_`j')
replace qpindex=qpindex+((weight`i'_`j'/weight) * relqp`i'_`j')
}
}

foreach i in food `list' `list2'{
replace pindex`i'=exp(pindex`i')
replace qpindex`i'=exp(qpindex`i')
}
replace pindex=exp(pindex)
replace qpindex=exp(qpindex)


summ *pindex*, detail



keep statenamedist districtnamedist round *pindex*
duplicates drop statenamedist districtnamedist round, force

save pindex_districts_revised, replace

